import pandas as pd

#3.4.1 数据的编码
#读取数据
df = pd.read_excel('zhiye.xlsx')
print(df)
print('编码后的数据：', pd.get_dummies(df))

#3.4.2 连续数据离散化
df1 = pd.read_excel('student_info.xlsx')
print(df1)
df1['体质指数'] = df1['体重'] / df1['身高'] ** 2
df1['健康状况'] = pd.cut(df1['体质指数'], bins=[0, 18.5, 24, 28, 50],
right=False, include_lowest=True, labels=['消瘦', '正常', '超重', '肥胖'])
print(df1)

#3.5时间信息的转换与提取
s='2025-10-23'
ss = pd.to_datetime(s)
print(ss)
sss=pd.to_datetime(s).date()
print(sss)